### Replication file for Reuning & Plutzer
### "Valid vs. Invalid Straightlining: 
### The Complex Relationship Between Straightlining and Data Quality"
### Published in SURVEY RESEARCH METHODS


### All packages below can be installed with
# install.packages(c("ggplot2", "haven", "readstata13", "psych", "reshape2", "scales", "gridExtra"))
library(ggplot2)
library(haven)
library(readstata13)
library(psych)
library(reshape2)
library(scales)
library(gridExtra)

### Needs below data in working directory
df <- read_dta("HRS-Simdata-jan26.dta")  ### Simulated Data
full_df <- read.dta13("H06LB_R.dta") ### Actual data

### Rescales variables as necessary
df$diener_eta <-  scale(df$diener_smooth)+.6
df$discrim_scaled <- 1*scale(df$discrim_total)
df$age_scaled <- scale(df$age_in_2006)



# Simulation values #
N <- nrow(df)
items <- 5 
sims <- 500 # number of simulations

###### Values for baseline
lambdas <-  rep(.8, items)
sigma <- rep(.7, items)
cuts <- c(-1, -.3, 0, .3, 1)

##### Estimate Example Data
set.seed(1)
x_star <- df$diener_eta %*% t(lambdas) +
  matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)
range(x_star)


X <- apply(x_star, 2, function(x)
  as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))


tmp.df <- full_df[,grepl("klb003", names(full_df))]

pdf("figure_3.pdf", height=6, width=6)
par(mar=c(4,4,1,1))


plot(density(rowSums(tmp.df), na.rm=T), lty=1,  xlab="Sum of Responses", main='')
lines(density(rowSums(X)), lty=2)

abline(h=seq(0, 0.08, by=0.02),
       col='gray80', lty=3, lwd=1)
legend("topleft", legend=c("2006 HRS", "Simulated Data"),
       lty=1:2, bty='o', box.lwd = 1)

dev.off()


#### Table 1 ####
## Baseline 
coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X)
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  mod_full <- lm(score~ discrim_scaled + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(score~ discrim_scaled + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)

  mod_sling <- glm(sling~discrim_scaled + degree + age_scaled +
                     female + black + hispanic, data=df, family=binomial)
  coefs_sling[,ii] <- coef(mod_sling)[-1]

}

# Actual table values 
100-mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(pct_sling)*100
quantile(pct_sling, c(0.025, 0.975))*100
mean(unlist(alphas))


rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_sling) <- names(coef(mod_sling))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
plot_df <- rbind(df_full, df_lim)
plot_df$sim <- 'Baseline/Baseline'
plot_df_full <- plot_df


## Baseline/high ###
lambdas <-  rep(.8, items)
sigma <- rep(.5, items)
cuts <- c(-1, -.3, 0, .3, 1)


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X)
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  mod_full <- lm(score~ discrim_scaled + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(score~ discrim_scaled + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)

  mod_sling <- glm(sling~discrim_scaled + degree + age_scaled +
                     female + black + hispanic, data=df, family=binomial)
  coefs_sling[,ii] <- coef(mod_sling)[-1]

}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_sling) <- names(coef(mod_sling))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
plot_df <- rbind(df_full, df_lim)
plot_df$sim <- 'Baseline/High'
plot_df_full <- rbind(plot_df, plot_df_full)


## high/baseline ##
lambdas <-  rep(.9, items)
sigma <- rep(.7, items)
cuts <- c(-1, -.3, 0, .3, 1)


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X)
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  mod_full <- lm(score~ discrim_scaled + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(score~ discrim_scaled + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)

  mod_sling <- glm(sling~discrim_scaled + degree + age_scaled +
                     female + black + hispanic, data=df, family=binomial)
  coefs_sling[,ii] <- coef(mod_sling)[-1]

}

# Actual table values 

100-mean(pct_sling)*100
100*mean(pct_sling)
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_sling) <- names(coef(mod_sling))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
plot_df <- rbind(df_full, df_lim)
plot_df$sim <- 'High/Baseline'
plot_df_full <- rbind(plot_df, plot_df_full)


## high/high ##
lambdas <-  rep(.9, items)
sigma <- rep(.5, items)
cuts <- c(-1, -.3, 0, .3, 1)


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X)
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  mod_full <- lm(score~ discrim_scaled + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(score~ discrim_scaled + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)

  mod_sling <- glm(sling~discrim_scaled + degree + age_scaled +
                     female + black + hispanic, data=df, family=binomial)
  coefs_sling[,ii] <- coef(mod_sling)[-1]

}

# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_sling) <- names(coef(mod_sling))[-1]


#### Figure  4 ####

## Collects data from ^ to plot
df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
plot_df <- rbind(df_full, df_lim)
plot_df$sim <- 'High/High'
plot_df_full <- rbind(plot_df, plot_df_full)
plot_df_full<- plot_df_full[plot_df_full$sim %in% c("Baseline/Baseline", "High/High"),]
plot_df_full$x <- factor(plot_df_full$sim, levels=rev(c("Baseline/Baseline", "High/High")),
                         labels=c('High Validity\nHigh Relability', 'Baseline'))
levels(plot_df_full$Var1) <- c('Total\nDiscrimination', 'College\nDegree', 'Age', 'Female', 'Black', 'Hispanic')
plot_df_full$type <- ordered(plot_df_full$type, levels=rev(c("Full Sample", "No Straightliners")))
ggplot(plot_df_full, aes(x=x, y=value, fill=type)) +
  theme_minimal() + theme(legend.position = 'bottom',
                          panel.border = element_rect(linetype = 'solid', fill=NA)) +
  geom_boxplot() + scale_fill_brewer("Sample:",type = 'div', palette=5) +
  facet_grid(~Var1, scales='free') + coord_flip() +
  labs(y='Coefficient', x='Simulation') +
  scale_y_continuous(labels=number_format(accuracy=.1)) +
  geom_hline(yintercept=0, linetype='dashed', color='black')
ggsave('figure_4.pdf', height=8, width=10, units='in')
##

## Written examples ##
by(plot_df_full$value, list(plot_df_full$type, plot_df_full$sim, plot_df_full$Var1), summary)

(1.58 - 1.36)/1.58
(1.79 - 1.37)/1.79
(2.12 - 1.52)/2.12
(0.53 - 0.32)/0.53

(mean(plot_df_full$value[plot_df_full$type == 'Full Sample' &
                          plot_df_full$sim=='High/High' &
                          plot_df_full$Var1=='Female' ]) -
  mean(plot_df_full$value[plot_df_full$type == 'No Straightliners' &
                            plot_df_full$sim=='High/High' &
                            plot_df_full$Var1=='Female' ]))/
  mean(plot_df_full$value[plot_df_full$type == 'Full Sample' &
                            plot_df_full$sim=='High/High' &
                            plot_df_full$Var1=='Female' ])


#### Table 2 ####
##  1 reversed ##
lambdas <-  c(rep(.8, items-1), -.8)
sigma <- rep(.7, items)
cuts <- c(-1, -.3, 0, .3, 1)



pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)


  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))




## 2 reversed ##
lambdas <-  c(rep(.8, items-2), -.8, -.8)
sigma <- rep(.7, items)
cuts <- c(-1, -.3, 0, .3, 1)



pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=6, ncol=sims)
alphas <- numeric(sims)

for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)


  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))



#### Table A1 #### 
## Baseline ##
df$diener_eta <- df$diener_eta - .5
lambdas <-  c(rep(.8, items))
sigma <- rep(.7, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)


  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))




## Baseline - High ##
lambdas <-  c(rep(.8, items))
sigma <- rep(.5, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)



}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


## High - Baseline ##
lambdas <-  c(rep(.9, items))
sigma <- rep(.7, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)


}


100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


## High - High ##
lambdas <-  c(rep(.9, items))
sigma <- rep(.5, items)
cuts <- c(-1, -.5, .5, 1)



pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)


  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))





## Low - Baseline ##
lambdas <-  c(rep(.5, items))
sigma <- rep(.7, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))




##  Low - High ##
lambdas <-  c(rep(.5, items))
sigma <- rep(.5, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)



}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))






#### Table A2 ####
### 1 Reverse  Low - High ##
lambdas <-  c(rep(.9, items-1), -.9)
sigma <- rep(.5, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)


}

# Actual table values 
100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))






### 2 Reverse  Low - High ##
lambdas <-  c(rep(.9, items-2), -.9, -.9)
sigma <- rep(.5, items)
cuts <- c(-1, -.5, .5, 1)


pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
for(ii in 1:sims){
  set.seed(ii)
  x_star <- df$diener_eta %*% t(lambdas) +
    matrix(rnorm(N*items, sd=sigma[1]), nrow=N, ncol=items)

  # plot(density(x_star))
  # abline(v=cuts)
  X <- apply(x_star, 2, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$score <- rowSums(X)

  pct_sling[ii] <- mean(df$sling)



}

# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100

mean(unlist(alphas))


#### Figure 5 ####

p1 <- ggplot(df, aes(x=discrim_eta, y= 100*stat(density))) +
  geom_histogram(bins=12, fill='gray', color='black') +
  theme_minimal() +
  labs(y='Percent', x='Latent Variable - Number of Days')



lambdas <-  rep(.5, items)
sigma <- rep(.75, items)
cuts=c(.5, 3.5, 15.5)
bias=2
types=c(0.7, 0.15, 0.15)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))

res = matrix(NA, N, items)
set.seed(1)
for(kk in 1:items){
  res[,kk] = rpois(N, sigma[kk] * epsilon)
  res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
}

type <- sample(1:3, N, prob=types, replace = T)
x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
for(jj in 1:N){
  if(type[jj]==2){
    x_star[,jj] = 0
  } else if(type[jj]==3){
    x_star[,jj] = x_star[,jj] + bias
  }
}
x_star[x_star < 0] <- 0

p2 <-ggplot(data.frame(t(x_star)), aes(x=X1,y= 100*stat(density))) +
  geom_histogram(bins=12, fill='gray', color='black') +
  theme_minimal() +
  labs(y='Percent', x=expression(paste(X, '*')))
new_df <- data.frame("simulated"=x_star[1,],
                     "real"=df$discrim_eta)

qq.out <- qqplot(x=x_star[1,], y=df$discrim_eta, plot.it=FALSE)
qq.out <- as.data.frame(qq.out)
# Set the x and y limits
xylim <- range( c(qq.out$x, qq.out$y) )

# Generate the QQ plot
p3 <- ggplot(qq.out, aes( x= x, y = y)) +
  geom_point() +
  geom_abline( intercept=0, slope=1) +
  coord_fixed(ratio = 1, xlim=xylim, ylim = xylim) +
  xlab(expression(paste(X, '*'))) + ylab("Latent Variables") +
  theme_minimal()


p <- grid.arrange(p1, p2, p3, layout_matrix=matrix(c(1,3,2,3), 2, 2))

ggsave('figure_5.pdf', p,  height=8, width=8, units='in')


##### Table 6 ####
## sigma .75 cutpoints A:  ##


lambdas <-  rep(.5, items)
sigma <- rep(.75, items)
cuts=c(.5, 2.5, 10.5)
bias=2
types=c(0.7, 0.15, 0.15)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
coefs_lim2 <- matrix(NA, nrow=6, ncol=sims)

for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  df$sling_id <- X[,1]
  df$sling_id[df$sling==1] <- 0
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]

  mod_lim2 <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df, subset=sling_id<2 )
  coefs_lim2[,ii] <- coef(mod_lim2)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))



rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_lim2) <- names(coef(mod_full))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_lim2 <- as.data.frame(melt(coefs_lim2))

df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
df_lim2$type <- 'No Positive Straightliners'

plot_df <- rbind(df_full, df_lim, df_lim2)
plot_df$sim <- 'Sigma .75\nCutpoint A'
plot_df_full <- plot_df


## sigma .25 cutpoints A: ##


lambdas <-  rep(.5, items)
sigma <- rep(.25, items)
cuts=c(.5, 2.5, 10.5)
bias=2
types=c(0.7, 0.15, 0.15)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
coefs_lim2 <- matrix(NA, nrow=6, ncol=sims)

for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  df$sling_id <- X[,1]
  df$sling_id[df$sling==1] <- 0
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]

  mod_lim2 <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df, subset=sling_id<2 )
  coefs_lim2[,ii] <- coef(mod_lim2)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))

rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_lim2) <- names(coef(mod_full))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_lim2 <- as.data.frame(melt(coefs_lim2))

df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
df_lim2$type <- 'No Positive Straightliners'
plot_df <- rbind(df_full, df_lim, df_lim2)
plot_df$sim <- 'Sigma .25\nCutpoint A'
plot_df_full <- rbind(plot_df, plot_df_full)



## sigma .75 cutpoints B: ##


lambdas <-  rep(.5, items)
sigma <- rep(.75, items)
cuts=c(.5, 5.5, 20.5)
bias=2
types=c(0.7, 0.15, 0.15)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
coefs_lim2 <- matrix(NA, nrow=6, ncol=sims)

for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  df$sling_id <- X[,1]
  df$sling_id[df$sling==1] <- 0
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]

  mod_lim2 <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df, subset=sling_id<2 )
  coefs_lim2[,ii] <- coef(mod_lim2)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))

rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_lim2) <- names(coef(mod_full))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_lim2 <- as.data.frame(melt(coefs_lim2))

df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
df_lim2$type <- 'No Positive Straightliners'
plot_df <- rbind(df_full, df_lim, df_lim2)
plot_df$sim <- 'Sigma .75\nCutpoint B'
plot_df_full <- rbind(plot_df, plot_df_full)



## sigma .25 cutpoints B: ##


lambdas <-  rep(.5, items)
sigma <- rep(.25, items)
cuts=c(.5, 5.5, 20.5)
bias=2
types=c(0.7, 0.15, 0.15)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
coefs_lim2 <- matrix(NA, nrow=6, ncol=sims)

for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  df$sling_id <- X[,1]
  df$sling_id[df$sling==1] <- 0
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]

  mod_lim2 <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling_id<2 )
  coefs_lim2[,ii] <- coef(mod_lim2)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))


#### Figure B1 ####
## Uses data from ^ ##
rownames(coefs_full) <- names(coef(mod_full))[-1]
rownames(coefs_lim) <- names(coef(mod_full))[-1]
rownames(coefs_lim2) <- names(coef(mod_full))[-1]

df_full <- as.data.frame(melt(coefs_full))
df_lim <- as.data.frame(melt(coefs_lim))
df_lim2 <- as.data.frame(melt(coefs_lim2))

df_full$type <- 'Full Sample'
df_lim$type <- 'No Straightliners'
df_lim2$type <- 'No Positive Straightliners'
plot_df <- rbind(df_full, df_lim, df_lim2)
plot_df$sim <- 'Sigma .25\nCutpoint B'
plot_df_full <- rbind(plot_df, plot_df_full)


levels(plot_df_full$Var1) <- c('Total\nDiscrimination', 'College\nDegree', 'Age', 'Female', 'Black', 'Hispanic')
plot_df_full$type <- ordered(plot_df_full$type, levels=rev(c("Full Sample", "No Straightliners", "No Positive Straightliners")))
ggplot(plot_df_full[plot_df_full$Var1 == "Total\nDiscrimination",],
       aes(x=sim, y=value, fill=type)) +
  theme_minimal() + theme(legend.position = 'bottom',
                          panel.border = element_rect(linetype = 'solid', fill=NA)) +
  geom_boxplot() + scale_fill_brewer("Sample:",type = 'qual', palette=5) +
  # facet_grid(~Var1, scale='free') +
  coord_flip() +
  labs(y='Coefficient', x='Simulation') +
  scale_y_continuous(labels=number_format(accuracy=.1)) +
  geom_hline(yintercept=0, linetype='dashed', color='black')

ggsave('figure_b1.pdf', height=8, width=10, units='in')


#### Table 7 ####
## No overreporters with sigma .25 cutpoints B: ##


lambdas <-  rep(.5, items)
sigma <- rep(.25, items)
cuts=c(.5, 5.5, 20.5)
bias=2
types=c(0.85, 0.15, 0)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))


## All honest with sigma .25 cutpoints B: ##


lambdas <-  rep(.5, items)
sigma <- rep(.25, items)
cuts=c(.5, 5.5, 20.5)
bias=2
types=c(1, 0, 0)
epsilon = 16 + (-1 * abs(df$discrim_eta-15))


coefs_full <- matrix(NA, nrow=6, ncol=sims)
coefs_lim <- matrix(NA, nrow=6, ncol=sims)
pct_sling <- numeric(sims)
pct_sling_each <- matrix(0, nrow=length(cuts)+1, ncol=sims)
alphas <- numeric(sims)
coefs_sling <- matrix(NA, nrow=6, ncol=sims)
for(ii in 1:sims){
  set.seed(ii)
  for(kk in 1:items){
    res[,kk] = rpois(N, sigma[kk] * epsilon)
    res[df$discrim_eta > 15,kk] <- -res[df$discrim_eta>15, kk]
  }

  type <- sample(1:3, N, prob=types, replace = T)
  x_star <- t((df$discrim_eta) %*% t(lambdas)  + (res)) ## check
  for(jj in 1:N){
    if(type[jj]==2){
      x_star[,jj] = 0
    } else if(type[jj]==3){
      x_star[,jj] = x_star[,jj] + bias
    }
  }
  x_star[x_star < 0] <- 0


  X <- apply(x_star, 1, function(x)
    as.numeric(cut(x, breaks = c(-Inf, cuts, Inf), ordered_result = T)))

  tmp <- psych::alpha(X, keys=sign(lambdas))
  alphas[ii] <- tmp$total[1]
  tmp <- apply(X, 1, function(x) all(x[1]==x))
  df$sling <- 1*tmp
  tmp <- table(X[tmp,1])
  for(jj in 1:length(tmp)){
    pct_sling_each[as.numeric(names(tmp)[jj]), ii] <- tmp[jj]/nrow(df)
  }
  df$discrim_score <- scale(rowSums(X))

  mod_full <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                   female + black + hispanic, data=df)
  coefs_full[,ii] <- coef(mod_full)[-1]
  mod_lim <- lm(diener_smooth~ discrim_score + degree + age_scaled +
                  female + black + hispanic, data=df, subset=sling==0)
  coefs_lim[,ii] <- coef(mod_lim)[-1]
  pct_sling[ii] <- mean(df$sling)
}
# Actual table values 

100-mean(pct_sling)*100
mean(pct_sling)*100
apply(pct_sling_each, 1, mean)*100
mean(unlist(alphas))

